{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Comparing randomized search and grid search for hyperparameter estimation\n\n\nCompare randomized search and grid search for optimizing hyperparameters of a\nrandom forest.\nAll parameters that influence the learning are searched simultaneously\n(except for the number of estimators, which poses a time / quality tradeoff).\n\nThe randomized search and the grid search explore exactly the same space of\nparameters. The result in parameter settings is quite similar, while the run\ntime for randomized search is drastically lower.\n\nThe performance is may slightly worse for the randomized search, and is likely\ndue to a noise effect and would not carry over to a held-out test set.\n\nNote that in practice, one would not search over this many different parameters\nsimultaneously using grid search, but pick only the ones deemed most important.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\nimport numpy as np\n\nfrom time import time\nimport scipy.stats as stats\nfrom sklearn.utils.fixes import loguniform\n\nfrom sklearn.model_selection import GridSearchCV, RandomizedSearchCV\nfrom sklearn.datasets import load_digits\nfrom sklearn.linear_model import SGDClassifier\n\n# get some data\nX, y = load_digits(return_X_y=True)\n\n# build a classifier\nclf = SGDClassifier(loss='hinge', penalty='elasticnet',\n                    fit_intercept=True)\n\n\n# Utility function to report best scores\ndef report(results, n_top=3):\n    for i in range(1, n_top + 1):\n        candidates = np.flatnonzero(results['rank_test_score'] == i)\n        for candidate in candidates:\n            print(\"Model with rank: {0}\".format(i))\n            print(\"Mean validation score: {0:.3f} (std: {1:.3f})\"\n                  .format(results['mean_test_score'][candidate],\n                          results['std_test_score'][candidate]))\n            print(\"Parameters: {0}\".format(results['params'][candidate]))\n            print(\"\")\n\n\n# specify parameters and distributions to sample from\nparam_dist = {'average': [True, False],\n              'l1_ratio': stats.uniform(0, 1),\n              'alpha': loguniform(1e-4, 1e0)}\n\n# run randomized search\nn_iter_search = 20\nrandom_search = RandomizedSearchCV(clf, param_distributions=param_dist,\n                                   n_iter=n_iter_search)\n\nstart = time()\nrandom_search.fit(X, y)\nprint(\"RandomizedSearchCV took %.2f seconds for %d candidates\"\n      \" parameter settings.\" % ((time() - start), n_iter_search))\nreport(random_search.cv_results_)\n\n# use a full grid over all parameters\nparam_grid = {'average': [True, False],\n              'l1_ratio': np.linspace(0, 1, num=10),\n              'alpha': np.power(10, np.arange(-4, 1, dtype=float))}\n\n# run grid search\ngrid_search = GridSearchCV(clf, param_grid=param_grid)\nstart = time()\ngrid_search.fit(X, y)\n\nprint(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n      % (time() - start, len(grid_search.cv_results_['params'])))\nreport(grid_search.cv_results_)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}